samplesheet-parser 0.3.2__tar.gz → 0.3.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.github/workflows/ci.yml +15 -0
  2. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.gitignore +2 -0
  3. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/CHANGELOG.md +54 -0
  4. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/CITATION.cff +14 -4
  5. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/PKG-INFO +78 -3
  6. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/README.md +77 -2
  7. samplesheet_parser-0.3.4/images/samplesheet_parser_arch_v03.png +0 -0
  8. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/pyproject.toml +1 -1
  9. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/__init__.py +2 -0
  10. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/cli.py +105 -1
  11. samplesheet_parser-0.3.4/samplesheet_parser/index_utils.py +184 -0
  12. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/merger.py +12 -2
  13. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/validators.py +9 -1
  14. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_cli.py +155 -0
  15. samplesheet_parser-0.3.4/tests/test_index_utils.py +163 -0
  16. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_validators/test_validators.py +57 -0
  17. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.github/workflows/copilot-instructions.md +0 -0
  18. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/.zenodo.json +0 -0
  19. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/CONTRIBUTING.md +0 -0
  20. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/LICENSE +0 -0
  21. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/demo_merger.py +0 -0
  22. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/parse_examples.py +0 -0
  23. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectAlpha_SampleSheet.csv +0 -0
  24. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectBeta_SampleSheet.csv +0 -0
  25. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectDelta_SampleSheet_collision.csv +0 -0
  26. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/ProjectGamma_SampleSheet.csv +0 -0
  27. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/README.md +0 -0
  28. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/combined_clean.csv +0 -0
  29. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/combined_collision_forced.csv +0 -0
  30. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/combined_mixed_formats.csv +0 -0
  31. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_dual_index.csv +0 -0
  32. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_multi_lane.csv +0 -0
  33. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_single_index.csv +0 -0
  34. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_with_lab_qc_settings.csv +0 -0
  35. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v1_with_manifests.csv +0 -0
  36. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_nextseq_single_index.csv +0 -0
  37. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_novaseq_x_dual_index.csv +0 -0
  38. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_cloud_settings.csv +0 -0
  39. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_index_umi.csv +0 -0
  40. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_pipeline_settings.csv +0 -0
  41. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/examples/sample_sheets/v2_with_read_umi.csv +0 -0
  42. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/images/samplesheet_parser_overview.png +0 -0
  43. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/converter.py +0 -0
  44. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/diff.py +0 -0
  45. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/enums.py +0 -0
  46. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/factory.py +0 -0
  47. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/parsers/__init__.py +0 -0
  48. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/parsers/v1.py +0 -0
  49. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/parsers/v2.py +0 -0
  50. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/samplesheet_parser/writer.py +0 -0
  51. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/scripts/demo_converter.py +0 -0
  52. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/scripts/demo_diff.py +0 -0
  53. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/scripts/demo_writer.py +0 -0
  54. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/__init__.py +0 -0
  55. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/conftest.py +0 -0
  56. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/fixtures/SampleSheet_v1_dual_index.csv +0 -0
  57. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/fixtures/SampleSheet_v2_dual_index.csv +0 -0
  58. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/fixtures/SampleSheet_v2_modified.csv +0 -0
  59. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_converter.py +0 -0
  60. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_diff.py +0 -0
  61. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_factory.py +0 -0
  62. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_merger.py +0 -0
  63. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_parsers/__init__.py +0 -0
  64. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_parsers/test_v1.py +0 -0
  65. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_parsers/test_v2.py +0 -0
  66. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_validators/__init__.py +0 -0
  67. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_validators/test_hamming.py +0 -0
  68. {samplesheet_parser-0.3.2 → samplesheet_parser-0.3.4}/tests/test_writer.py +0 -0
@@ -67,3 +67,18 @@ jobs:
67
67
  uses: pypa/gh-action-pypi-publish@release/v1
68
68
  with:
69
69
  password: ${{ secrets.PYPI_API_TOKEN }}
70
+
71
+ release:
72
+ needs: publish
73
+ runs-on: ubuntu-latest
74
+ if: startsWith(github.ref, 'refs/tags/v')
75
+ permissions:
76
+ contents: write
77
+
78
+ steps:
79
+ - uses: actions/checkout@v4
80
+
81
+ - name: Create GitHub Release
82
+ uses: softprops/action-gh-release@v2
83
+ with:
84
+ generate_release_notes: true
@@ -2,6 +2,8 @@
2
2
  BLOGPOST.md
3
3
  tests/fixtures/outputs/
4
4
  demo_output.txt
5
+ **/CSBJ Submission/
6
+ **/.claude/
5
7
 
6
8
  # Cache and build artifacts
7
9
  __pycache__/
@@ -6,6 +6,60 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [0.3.4] - 2026-04-04
10
+
11
+ ### Added
12
+
13
+ - **`samplesheet info` CLI command** — prints a concise summary of any V1 or
14
+ V2 sample sheet (format, sample count, lanes, index type, read lengths,
15
+ adapters, experiment name, instrument). Supports `--format json` for
16
+ machine-readable output; exits 0 on success, 2 on unreadable files.
17
+
18
+ - **Configurable Hamming distance threshold** — `SampleSheetValidator.validate()`
19
+ now accepts a `min_hamming_distance` keyword argument (default: 3) so labs
20
+ using longer indexes can enforce stricter thresholds without changing the
21
+ module-level constant.
22
+ - `SampleSheetMerger` accepts the same parameter in `__init__()` and applies
23
+ it to both the intra-sheet and cross-sheet Hamming checks as well as the
24
+ post-merge validation step.
25
+ - `samplesheet validate` exposes `--min-hamming N` (must be ≥ 1; exits 2 on
26
+ invalid input). The JSON output includes `min_hamming_distance` for
27
+ auditability.
28
+
29
+ - **`normalize_index_lengths()` utility** — normalizes index sequence lengths
30
+ across a list of sample dicts (output of `sheet.samples()`) to a consistent
31
+ length before merging sheets with mixed-length indexes.
32
+ - `strategy="trim"` — trims all indexes to the shortest sequence length.
33
+ - `strategy="pad"` — pads shorter indexes to the longest length using `"N"`
34
+ wildcard characters (supported by BCLConvert ≥ 3.9 and bcl2fastq ≥ 2.20).
35
+ - Auto-detects V1-style (`index`/`index2`) and V2-style (`Index`/`Index2`)
36
+ field names; explicit `index1_key`/`index2_key` overrides supported.
37
+ - Exported from the top-level package as `normalize_index_lengths`.
38
+
39
+ - **CI / pre-commit integration guide** in README — GitHub Actions workflow
40
+ and pre-commit hook configuration for automatic sample sheet validation on
41
+ every commit or pull request that touches a `SampleSheet.csv`.
42
+
43
+ ### Fixed
44
+
45
+ - `_detect_key()` in `index_utils` now selects the key with at least one
46
+ non-empty value before falling back to key presence, preventing silent
47
+ normalization skip when a key exists but all its values are `None` or `""`.
48
+
49
+ ### Changed
50
+
51
+ - `--min-hamming` CLI option default and help text are now derived from the
52
+ `MIN_HAMMING_DISTANCE` constant in `validators.py` to prevent drift.
53
+
54
+ ---
55
+
56
+ ## [0.3.3] - 2026-03-13
57
+
58
+ ### Documentation
59
+ - Add architecture diagram showing full library structure including CLI and SampleSheetMerger
60
+ - Update README with architecture overview, solid vs dashed line legend
61
+ - Add `[Custom_Sections*]` to V1 and V2 format descriptions
62
+
9
63
  ## [0.3.2] - 2026-03-12
10
64
 
11
65
  ### Added
@@ -1,23 +1,33 @@
1
1
  cff-version: 1.2.0
2
- message: "If you use this software, please cite it as below."
2
+ message: "If you use this software, please cite it using the metadata below."
3
3
  type: software
4
4
  title: "samplesheet-parser"
5
+ abstract: >
6
+ A Python library for parsing, validating, converting, and merging
7
+ Illumina SampleSheet V1 and V2 files for BCLConvert and bcl2fastq.
8
+ Provides format auto-detection, bidirectional V1/V2 conversion,
9
+ structural and index validation, sheet diffing, and cross-project
10
+ merging with Hamming-distance collision detection.
5
11
  version: 0.3.2
6
12
  date-released: 2026-03-12
7
13
  license: Apache-2.0
8
14
  url: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
9
15
  repository-code: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
10
- abstract: >
11
- A Python library for parsing, validating, converting, and merging
12
- Illumina SampleSheet V1 and V2 files for BCLConvert and bcl2fastq.
16
+ repository-artifact: "https://pypi.org/project/samplesheet-parser/"
13
17
  keywords:
14
18
  - bioinformatics
15
19
  - Illumina
16
20
  - SampleSheet
17
21
  - BCLConvert
22
+ - bcl2fastq
18
23
  - demultiplexing
19
24
  - genomics
25
+ - sequencing
20
26
  - Python
27
+ identifiers:
28
+ - type: doi
29
+ value: 10.5281/zenodo.18989694
30
+ description: Concept DOI (all versions)
21
31
  authors:
22
32
  - family-names: Kasaraneni
23
33
  given-names: Chaitanya Krishna
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: samplesheet-parser
3
- Version: 0.3.2
3
+ Version: 0.3.4
4
4
  Summary: Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2
5
5
  Project-URL: Homepage, https://github.com/chaitanyakasaraneni/samplesheet-parser
6
6
  Project-URL: Documentation, https://illumina-samplesheet.readthedocs.io
@@ -60,8 +60,9 @@ Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConver
60
60
  [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-yellow.svg)](https://opensource.org/licenses/Apache-2.0)
61
61
  [![Tests](https://github.com/chaitanyakasaraneni/samplesheet-parser/actions/workflows/ci.yml/badge.svg)](https://github.com/chaitanyakasaraneni/samplesheet-parser/actions)
62
62
  [![codecov](https://codecov.io/gh/chaitanyakasaraneni/samplesheet-parser/branch/main/graph/badge.svg?token=CODECOV_TOKEN)](https://codecov.io/gh/chaitanyakasaraneni/samplesheet-parser)
63
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18989694.svg)](https://doi.org/10.5281/zenodo.18989694)
63
64
 
64
- ![samplesheet-parser overview](https://raw.githubusercontent.com/chaitanyakasaraneni/samplesheet-parser/main/images/samplesheet_parser_overview.png)
65
+ ![samplesheet-parser overview](https://raw.githubusercontent.com/chaitanyakasaraneni/samplesheet-parser/main/images/samplesheet_parser_arch_v03.png)
65
66
 
66
67
  *`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, `SampleSheetMerger` combines multiple per-project sheets into one, and `SampleSheetWriter` builds or edits sheets programmatically. The `samplesheet` CLI exposes all of this from the shell.*
67
68
 
@@ -556,6 +557,80 @@ sheet.get_read_structure() # → ReadStructure dataclass
556
557
 
557
558
  ---
558
559
 
560
+ ## CI / pre-commit integration
561
+
562
+ The CLI exits with meaningful codes (`0` = clean, `1` = issues, `2` = error), making it easy to wire into automated pipelines.
563
+
564
+ ### GitHub Actions
565
+
566
+ Add a validation step to any workflow that touches `SampleSheet.csv`:
567
+
568
+ ```yaml
569
+ # .github/workflows/validate-samplesheet.yml
570
+ name: Validate SampleSheet
571
+
572
+ on:
573
+ push:
574
+ paths:
575
+ - '**/SampleSheet.csv'
576
+ pull_request:
577
+ paths:
578
+ - '**/SampleSheet.csv'
579
+
580
+ jobs:
581
+ validate:
582
+ runs-on: ubuntu-latest
583
+ steps:
584
+ - uses: actions/checkout@v4
585
+
586
+ - uses: actions/setup-python@v5
587
+ with:
588
+ python-version: '3.12'
589
+
590
+ - run: pip install "samplesheet-parser[cli]"
591
+
592
+ - name: Validate SampleSheet
593
+ run: samplesheet validate SampleSheet.csv --format json
594
+ ```
595
+
596
+ ### pre-commit hook
597
+
598
+ Gate commits that touch any `SampleSheet.csv` in the repository:
599
+
600
+ ```yaml
601
+ # .pre-commit-config.yaml
602
+ repos:
603
+ - repo: local
604
+ hooks:
605
+ - id: samplesheet-validate
606
+ name: Validate SampleSheet.csv
607
+ entry: samplesheet validate
608
+ language: python
609
+ additional_dependencies: ["samplesheet-parser[cli]"]
610
+ files: SampleSheet\.csv$
611
+ pass_filenames: true
612
+ ```
613
+
614
+ Install and run once to verify:
615
+
616
+ ```bash
617
+ pip install pre-commit
618
+ pre-commit install
619
+ pre-commit run samplesheet-validate --all-files
620
+ ```
621
+
622
+ ### Stricter Hamming distance in CI
623
+
624
+ If your lab uses longer indexes (10 bp+), raise the minimum Hamming distance threshold to catch borderline cases earlier:
625
+
626
+ ```bash
627
+ samplesheet validate SampleSheet.csv --min-hamming 4
628
+ ```
629
+
630
+ This is especially useful in CI where you want to prevent runs that will likely fail demultiplexing.
631
+
632
+ ---
633
+
559
634
  ## Contributing
560
635
 
561
636
  ```bash
@@ -584,7 +659,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
584
659
  title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
585
660
  year = {2026},
586
661
  url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
587
- version = {0.3.2}
662
+ version = {0.3.4}
588
663
  }
589
664
  ```
590
665
 
@@ -9,8 +9,9 @@ Supports both the classic IEM V1 format (bcl2fastq era) and the modern BCLConver
9
9
  [![License: Apache 2.0](https://img.shields.io/badge/License-Apache_2.0-yellow.svg)](https://opensource.org/licenses/Apache-2.0)
10
10
  [![Tests](https://github.com/chaitanyakasaraneni/samplesheet-parser/actions/workflows/ci.yml/badge.svg)](https://github.com/chaitanyakasaraneni/samplesheet-parser/actions)
11
11
  [![codecov](https://codecov.io/gh/chaitanyakasaraneni/samplesheet-parser/branch/main/graph/badge.svg?token=CODECOV_TOKEN)](https://codecov.io/gh/chaitanyakasaraneni/samplesheet-parser)
12
+ [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.18989694.svg)](https://doi.org/10.5281/zenodo.18989694)
12
13
 
13
- ![samplesheet-parser overview](https://raw.githubusercontent.com/chaitanyakasaraneni/samplesheet-parser/main/images/samplesheet_parser_overview.png)
14
+ ![samplesheet-parser overview](https://raw.githubusercontent.com/chaitanyakasaraneni/samplesheet-parser/main/images/samplesheet_parser_arch_v03.png)
14
15
 
15
16
  *`SampleSheetFactory` auto-detects the format and routes to the correct parser. Both formats share a common interface — `SampleSheetConverter` handles bidirectional conversion, `SampleSheetValidator` catches index and adapter issues, `SampleSheetDiff` compares two sheets across any combination of V1/V2 formats, `SampleSheetMerger` combines multiple per-project sheets into one, and `SampleSheetWriter` builds or edits sheets programmatically. The `samplesheet` CLI exposes all of this from the shell.*
16
17
 
@@ -505,6 +506,80 @@ sheet.get_read_structure() # → ReadStructure dataclass
505
506
 
506
507
  ---
507
508
 
509
+ ## CI / pre-commit integration
510
+
511
+ The CLI exits with meaningful codes (`0` = clean, `1` = issues, `2` = error), making it easy to wire into automated pipelines.
512
+
513
+ ### GitHub Actions
514
+
515
+ Add a validation step to any workflow that touches `SampleSheet.csv`:
516
+
517
+ ```yaml
518
+ # .github/workflows/validate-samplesheet.yml
519
+ name: Validate SampleSheet
520
+
521
+ on:
522
+ push:
523
+ paths:
524
+ - '**/SampleSheet.csv'
525
+ pull_request:
526
+ paths:
527
+ - '**/SampleSheet.csv'
528
+
529
+ jobs:
530
+ validate:
531
+ runs-on: ubuntu-latest
532
+ steps:
533
+ - uses: actions/checkout@v4
534
+
535
+ - uses: actions/setup-python@v5
536
+ with:
537
+ python-version: '3.12'
538
+
539
+ - run: pip install "samplesheet-parser[cli]"
540
+
541
+ - name: Validate SampleSheet
542
+ run: samplesheet validate SampleSheet.csv --format json
543
+ ```
544
+
545
+ ### pre-commit hook
546
+
547
+ Gate commits that touch any `SampleSheet.csv` in the repository:
548
+
549
+ ```yaml
550
+ # .pre-commit-config.yaml
551
+ repos:
552
+ - repo: local
553
+ hooks:
554
+ - id: samplesheet-validate
555
+ name: Validate SampleSheet.csv
556
+ entry: samplesheet validate
557
+ language: python
558
+ additional_dependencies: ["samplesheet-parser[cli]"]
559
+ files: SampleSheet\.csv$
560
+ pass_filenames: true
561
+ ```
562
+
563
+ Install and run once to verify:
564
+
565
+ ```bash
566
+ pip install pre-commit
567
+ pre-commit install
568
+ pre-commit run samplesheet-validate --all-files
569
+ ```
570
+
571
+ ### Stricter Hamming distance in CI
572
+
573
+ If your lab uses longer indexes (10 bp+), raise the minimum Hamming distance threshold to catch borderline cases earlier:
574
+
575
+ ```bash
576
+ samplesheet validate SampleSheet.csv --min-hamming 4
577
+ ```
578
+
579
+ This is especially useful in CI where you want to prevent runs that will likely fail demultiplexing.
580
+
581
+ ---
582
+
508
583
  ## Contributing
509
584
 
510
585
  ```bash
@@ -533,7 +608,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
533
608
  title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
534
609
  year = {2026},
535
610
  url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
536
- version = {0.3.2}
611
+ version = {0.3.4}
537
612
  }
538
613
  ```
539
614
 
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "samplesheet-parser"
7
- version = "0.3.2"
7
+ version = "0.3.4"
8
8
  description = "Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -36,6 +36,7 @@ from samplesheet_parser.converter import SampleSheetConverter
36
36
  from samplesheet_parser.diff import DiffResult, SampleSheetDiff
37
37
  from samplesheet_parser.enums import IndexType, SampleSheetVersion
38
38
  from samplesheet_parser.factory import SampleSheetFactory
39
+ from samplesheet_parser.index_utils import normalize_index_lengths
39
40
  from samplesheet_parser.merger import MergeResult, SampleSheetMerger
40
41
  from samplesheet_parser.parsers.v1 import SampleSheetV1
41
42
  from samplesheet_parser.parsers.v2 import SampleSheetV2
@@ -56,5 +57,6 @@ __all__ = [
56
57
  "SampleSheetWriter",
57
58
  "SampleSheetMerger",
58
59
  "MergeResult",
60
+ "normalize_index_lengths",
59
61
  "__version__",
60
62
  ]
@@ -5,6 +5,7 @@ Entry point: ``samplesheet`` (configured in ``pyproject.toml``).
5
5
 
6
6
  Commands
7
7
  --------
8
+ info Show a quick summary of a sample sheet.
8
9
  validate Validate a sheet — exit 0 if clean, exit 1 if errors.
9
10
  convert Convert between V1 and V2 formats.
10
11
  diff Diff two sheets — exit 1 if changes detected.
@@ -20,8 +21,12 @@ Usage
20
21
  -----
21
22
  ::
22
23
 
24
+ samplesheet info SampleSheet.csv
25
+ samplesheet info SampleSheet.csv --format json
26
+
23
27
  samplesheet validate SampleSheet.csv
24
28
  samplesheet validate SampleSheet.csv --format json
29
+ samplesheet validate SampleSheet.csv --min-hamming 4
25
30
 
26
31
  samplesheet convert SampleSheet_v1.csv --to v2 --output SampleSheet_v2.csv
27
32
  samplesheet convert SampleSheet_v2.csv --to v1 --output SampleSheet_v1.csv
@@ -50,6 +55,7 @@ except ImportError: # pragma: no cover
50
55
  _TYPER_AVAILABLE = False
51
56
 
52
57
  from samplesheet_parser.enums import SampleSheetVersion
58
+ from samplesheet_parser.validators import MIN_HAMMING_DISTANCE as _MIN_HAMMING_DEFAULT
53
59
 
54
60
  if _TYPER_AVAILABLE:
55
61
  app = typer.Typer(
@@ -115,6 +121,87 @@ if _TYPER_AVAILABLE:
115
121
  typer.echo(f"Error: unknown format '{fmt}'. Use 'text' or 'json'.", err=True)
116
122
  raise typer.Exit(code=2)
117
123
 
124
+ # ---------------------------------------------------------------------------
125
+ # info
126
+ # ---------------------------------------------------------------------------
127
+
128
+ @app.command()
129
+ def info(
130
+ path: Annotated[Path, typer.Argument(help="Path to SampleSheet.csv.", metavar="FILE")],
131
+ fmt: _FormatOption = "text",
132
+ ) -> None:
133
+ """Display a quick summary of a sample sheet without full validation.
134
+
135
+ Shows format version, sample count, lanes, index type, read lengths,
136
+ and adapter sequences at a glance.
137
+
138
+ Exits 0 on success, 2 on unreadable files.
139
+ """
140
+ from samplesheet_parser.factory import SampleSheetFactory
141
+ from samplesheet_parser.parsers.v1 import SampleSheetV1
142
+
143
+ _validate_fmt(fmt)
144
+ if not path.exists():
145
+ typer.echo(f"Error: file not found: {path}", err=True)
146
+ raise typer.Exit(code=2)
147
+
148
+ try:
149
+ factory = SampleSheetFactory()
150
+ sheet = factory.create_parser(str(path), parse=True, clean=False)
151
+ except Exception as exc:
152
+ typer.echo(f"Error: could not parse {path}: {exc}", err=True)
153
+ raise typer.Exit(code=2) from exc
154
+
155
+ if factory.version is None: # pragma: no cover
156
+ raise RuntimeError("SampleSheetFactory.version must be set after create_parser")
157
+
158
+ samples = sheet.samples()
159
+ lanes = sorted({str(s.get("lane") or "") for s in samples} - {""}) or ["(none)"]
160
+ index_type = sheet.index_type()
161
+ adapters: list[str] = getattr(sheet, "adapters", []) or []
162
+ experiment_name: str | None = getattr(sheet, "experiment_name", None)
163
+
164
+ if isinstance(sheet, SampleSheetV1):
165
+ read_lengths = [str(r) for r in (sheet.read_lengths or [])]
166
+ instrument = sheet.instrument_type
167
+ else:
168
+ reads_dict = sheet.reads or {}
169
+ read_lengths = [
170
+ str(reads_dict[k])
171
+ for k in ("Read1Cycles", "Read2Cycles")
172
+ if k in reads_dict
173
+ ]
174
+ instrument = sheet.instrument_platform
175
+
176
+ if fmt == "json":
177
+ _print_json({
178
+ "file": str(path),
179
+ "format": factory.version.value,
180
+ "sample_count": len(samples),
181
+ "lanes": lanes,
182
+ "index_type": index_type,
183
+ "read_lengths": read_lengths,
184
+ "adapters": adapters,
185
+ "experiment_name": experiment_name,
186
+ "instrument": instrument,
187
+ })
188
+ else:
189
+ typer.echo(f"File: {path}")
190
+ typer.echo(f"Format: {factory.version.value}")
191
+ typer.echo(f"Samples: {len(samples)}")
192
+ typer.echo(f"Lanes: {', '.join(lanes)}")
193
+ typer.echo(f"Index type: {index_type}")
194
+ typer.echo(
195
+ f"Read lengths: {' + '.join(read_lengths) if read_lengths else '(not set)'}"
196
+ )
197
+ typer.echo(f"Adapters: {', '.join(adapters) if adapters else '(none)'}")
198
+ if experiment_name:
199
+ typer.echo(f"Experiment: {experiment_name}")
200
+ if instrument:
201
+ typer.echo(f"Instrument: {instrument}")
202
+
203
+ raise typer.Exit(code=0)
204
+
118
205
  # ---------------------------------------------------------------------------
119
206
  # validate
120
207
  # ---------------------------------------------------------------------------
@@ -123,6 +210,17 @@ if _TYPER_AVAILABLE:
123
210
  def validate(
124
211
  path: Annotated[Path, typer.Argument(help="Path to SampleSheet.csv.", metavar="FILE")],
125
212
  fmt: _FormatOption = "text",
213
+ min_hamming: Annotated[
214
+ int,
215
+ typer.Option(
216
+ "--min-hamming",
217
+ help=(
218
+ f"Minimum Hamming distance between indexes "
219
+ f"(default: {_MIN_HAMMING_DEFAULT}, must be >= 1)."
220
+ ),
221
+ metavar="N",
222
+ ),
223
+ ] = _MIN_HAMMING_DEFAULT,
126
224
  ) -> None:
127
225
  """Validate a sample sheet for index, adapter, and structural issues.
128
226
 
@@ -134,6 +232,11 @@ if _TYPER_AVAILABLE:
134
232
  from samplesheet_parser.validators import SampleSheetValidator
135
233
 
136
234
  _validate_fmt(fmt)
235
+ if min_hamming < 1:
236
+ typer.echo(
237
+ f"Error: --min-hamming must be >= 1, got {min_hamming}.", err=True
238
+ )
239
+ raise typer.Exit(code=2)
137
240
  if not path.exists():
138
241
  typer.echo(f"Error: file not found: {path}", err=True)
139
242
  raise typer.Exit(code=2)
@@ -149,13 +252,14 @@ if _TYPER_AVAILABLE:
149
252
  raise RuntimeError("SampleSheetFactory.version must be set after create_parser")
150
253
  version = factory.version
151
254
 
152
- result = SampleSheetValidator().validate(sheet)
255
+ result = SampleSheetValidator().validate(sheet, min_hamming_distance=min_hamming)
153
256
 
154
257
  if fmt == "json":
155
258
  _print_json({
156
259
  "file": str(path),
157
260
  "version": version.value,
158
261
  "is_valid": result.is_valid,
262
+ "min_hamming_distance": min_hamming,
159
263
  "errors": [
160
264
  {"code": e.code, "message": e.message, "context": e.context}
161
265
  for e in result.errors