samplesheet-parser 0.3.0__tar.gz → 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. samplesheet_parser-0.3.2/.zenodo.json +30 -0
  2. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/CHANGELOG.md +76 -0
  3. samplesheet_parser-0.3.2/CITATION.cff +25 -0
  4. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/PKG-INFO +2 -2
  5. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/README.md +1 -1
  6. samplesheet_parser-0.3.2/examples/demo_merger.py +130 -0
  7. samplesheet_parser-0.3.2/examples/sample_sheets/ProjectAlpha_SampleSheet.csv +25 -0
  8. samplesheet_parser-0.3.2/examples/sample_sheets/ProjectBeta_SampleSheet.csv +25 -0
  9. samplesheet_parser-0.3.2/examples/sample_sheets/ProjectDelta_SampleSheet_collision.csv +24 -0
  10. samplesheet_parser-0.3.2/examples/sample_sheets/ProjectGamma_SampleSheet.csv +21 -0
  11. samplesheet_parser-0.3.2/examples/sample_sheets/combined_clean.csv +20 -0
  12. samplesheet_parser-0.3.2/examples/sample_sheets/combined_collision_forced.csv +19 -0
  13. samplesheet_parser-0.3.2/examples/sample_sheets/combined_mixed_formats.csv +23 -0
  14. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/pyproject.toml +1 -1
  15. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/merger.py +39 -1
  16. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.github/workflows/ci.yml +0 -0
  17. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.github/workflows/copilot-instructions.md +0 -0
  18. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/.gitignore +0 -0
  19. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/CONTRIBUTING.md +0 -0
  20. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/LICENSE +0 -0
  21. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/parse_examples.py +0 -0
  22. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/README.md +0 -0
  23. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_dual_index.csv +0 -0
  24. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_multi_lane.csv +0 -0
  25. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_single_index.csv +0 -0
  26. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_with_lab_qc_settings.csv +0 -0
  27. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v1_with_manifests.csv +0 -0
  28. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_nextseq_single_index.csv +0 -0
  29. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_novaseq_x_dual_index.csv +0 -0
  30. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_cloud_settings.csv +0 -0
  31. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_index_umi.csv +0 -0
  32. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_pipeline_settings.csv +0 -0
  33. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/examples/sample_sheets/v2_with_read_umi.csv +0 -0
  34. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/images/samplesheet_parser_overview.png +0 -0
  35. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/__init__.py +0 -0
  36. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/cli.py +0 -0
  37. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/converter.py +0 -0
  38. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/diff.py +0 -0
  39. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/enums.py +0 -0
  40. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/factory.py +0 -0
  41. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/__init__.py +0 -0
  42. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/v1.py +0 -0
  43. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/parsers/v2.py +0 -0
  44. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/validators.py +0 -0
  45. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/samplesheet_parser/writer.py +0 -0
  46. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/scripts/demo_converter.py +0 -0
  47. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/scripts/demo_diff.py +0 -0
  48. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/scripts/demo_writer.py +0 -0
  49. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/__init__.py +0 -0
  50. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/conftest.py +0 -0
  51. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v1_dual_index.csv +0 -0
  52. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v2_dual_index.csv +0 -0
  53. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/fixtures/SampleSheet_v2_modified.csv +0 -0
  54. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_cli.py +0 -0
  55. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_converter.py +0 -0
  56. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_diff.py +0 -0
  57. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_factory.py +0 -0
  58. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_merger.py +0 -0
  59. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_parsers/__init__.py +0 -0
  60. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_parsers/test_v1.py +0 -0
  61. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_parsers/test_v2.py +0 -0
  62. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/__init__.py +0 -0
  63. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/test_hamming.py +0 -0
  64. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_validators/test_validators.py +0 -0
  65. {samplesheet_parser-0.3.0 → samplesheet_parser-0.3.2}/tests/test_writer.py +0 -0
@@ -0,0 +1,30 @@
1
+ {
2
+ "title": "samplesheet-parser: A Python library for parsing, validating, converting, and merging Illumina SampleSheet V1 and V2 files",
3
+ "description": "Open-source Python library providing format auto-detection, bidirectional V1/V2 conversion, structural and index validation, sheet diffing, and cross-project merging for Illumina SampleSheet files.",
4
+ "upload_type": "software",
5
+ "access_right": "open",
6
+ "license": "Apache-2.0",
7
+ "version": "0.3.1",
8
+ "keywords": [
9
+ "bioinformatics", "Illumina", "SampleSheet", "BCLConvert",
10
+ "demultiplexing", "genomics", "sequencing", "Python"
11
+ ],
12
+ "creators": [
13
+ {
14
+ "name": "Kasaraneni, Chaitanya Krishna",
15
+ "orcid": "0000-0001-5792-1095"
16
+ }
17
+ ],
18
+ "related_identifiers": [
19
+ {
20
+ "identifier": "https://github.com/chaitanyakasaraneni/samplesheet-parser",
21
+ "relation": "isSupplementTo",
22
+ "scheme": "url"
23
+ },
24
+ {
25
+ "identifier": "https://pypi.org/project/samplesheet-parser/",
26
+ "relation": "isIdenticalTo",
27
+ "scheme": "url"
28
+ }
29
+ ]
30
+ }
@@ -6,6 +6,82 @@ The format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
6
 
7
7
  ---
8
8
 
9
+ ## [0.3.2] - 2026-03-12
10
+
11
+ ### Added
12
+ - `.zenodo.json` metadata file for automatic Zenodo archival and DOI
13
+ minting on GitHub releases
14
+ - `CITATION.cff` file enabling GitHub's "Cite this repository" button
15
+ and standardized software citation for downstream users
16
+
17
+ ## [0.3.1] - 2026-03-11
18
+
19
+ ### Fixed
20
+
21
+ - **`SampleSheetMerger`** — `INDEX_DISTANCE_TOO_LOW` and `DUPLICATE_INDEX`
22
+ were reported twice in `--force` merges (once by the pre-merge cross-sheet
23
+ check, once by the post-merge validator). Duplicate codes are now suppressed
24
+ in `_validate_merged` — the more descriptive pre-merge message is always
25
+ preferred.
26
+
27
+ ## [0.3.0] - 2026-03-10
28
+
29
+ ### Added
30
+
31
+ - **`SampleSheetMerger`** — combines multiple per-project sample sheets into a
32
+ single sheet for a flow cell run.
33
+ - `add(path)` — register an input sheet (V1 or V2); mixed formats are
34
+ auto-converted to the target version before merging.
35
+ - `merge(output_path, validate=True, abort_on_conflicts=True)` — merges all
36
+ registered sheets, writes the combined output, and returns a `MergeResult`.
37
+ - **Index collision detection** — raises a conflict when two samples share
38
+ the same lane and index sequence across project boundaries.
39
+ - **Hamming distance check** — warns when the combined I7+I5 distance between
40
+ any two samples across sheets falls below 3.
41
+ - **Read-length conflict detection** — raises a conflict when registered
42
+ sheets specify incompatible `Read1Cycles`/`Read2Cycles` (V2) or `[Reads]`
43
+ lengths (V1).
44
+ - **Adapter conflict detection** — warns when adapter sequences differ across
45
+ sheets.
46
+ - **Mixed-format warning** — emits a warning when V1 and V2 sheets are
47
+ combined, with the auto-conversion strategy logged.
48
+ - `MergeResult` dataclass — exposes `conflicts`, `warnings`, `sample_count`,
49
+ `source_versions`, `output_path`, `has_conflicts`, and `summary()`;
50
+ consistent with `ValidationResult` and `DiffResult`.
51
+ - `abort_on_conflicts=True` (default) — skips writing the output file when
52
+ any conflict is present; set `False` (via `--force` in the CLI) to write
53
+ despite conflicts.
54
+ - `SampleSheetMerger` and `MergeResult` are exported from the top-level
55
+ package.
56
+
57
+ - **`samplesheet` CLI** — command-line interface exposing the four core
58
+ operations, available as an optional extra (`pip install
59
+ "samplesheet-parser[cli]"`; adds `typer` as a dependency).
60
+ - `samplesheet validate <file>` — exits 0 if clean, 1 if errors, 2 on
61
+ usage/parse errors. Supports `--format json` for machine-readable output.
62
+ - `samplesheet convert <file> --to <v1|v2> --output <path>` — converts
63
+ between formats; exits 0 on success, 1 on conversion error, 2 on bad
64
+ arguments.
65
+ - `samplesheet diff <old> <new>` — exits 0 if identical, 1 if differences
66
+ detected (useful in CI pre-run checks). Supports `--format json`.
67
+ - `samplesheet merge <files...> --output <path>` — merges two or more sheets;
68
+ exits 0 on clean merge, 1 on conflicts or warnings, 2 on bad arguments.
69
+ Supports `--force`, `--to <v1|v2>`, and `--format json`.
70
+ - All commands print errors to stderr and structured data to stdout.
71
+ - Entry point configured in `pyproject.toml`:
72
+ `samplesheet = "samplesheet_parser.cli:main"`.
73
+ - Module imports cleanly without `typer` installed — missing-extra error is
74
+ surfaced only at invocation time.
75
+
76
+ ### Changed
77
+
78
+ - README updated to document `SampleSheetMerger`, the `samplesheet` CLI, all
79
+ new API reference tables, and installation instructions for the `[cli]` extra.
80
+ - `CONTRIBUTING.md` updated with CLI testing instructions and the new
81
+ `[dev,cli]` install target.
82
+
83
+ ---
84
+
9
85
  ## [0.2.0] - 2026-02-25
10
86
 
11
87
  ### Added
@@ -0,0 +1,25 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ type: software
4
+ title: "samplesheet-parser"
5
+ version: 0.3.2
6
+ date-released: 2026-03-12
7
+ license: Apache-2.0
8
+ url: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
9
+ repository-code: "https://github.com/chaitanyakasaraneni/samplesheet-parser"
10
+ abstract: >
11
+ A Python library for parsing, validating, converting, and merging
12
+ Illumina SampleSheet V1 and V2 files for BCLConvert and bcl2fastq.
13
+ keywords:
14
+ - bioinformatics
15
+ - Illumina
16
+ - SampleSheet
17
+ - BCLConvert
18
+ - demultiplexing
19
+ - genomics
20
+ - Python
21
+ authors:
22
+ - family-names: Kasaraneni
23
+ given-names: Chaitanya Krishna
24
+ email: kc.kasaraneni@gmail.com
25
+ orcid: "https://orcid.org/0000-0001-5792-1095"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: samplesheet-parser
3
- Version: 0.3.0
3
+ Version: 0.3.2
4
4
  Summary: Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2
5
5
  Project-URL: Homepage, https://github.com/chaitanyakasaraneni/samplesheet-parser
6
6
  Project-URL: Documentation, https://illumina-samplesheet.readthedocs.io
@@ -584,7 +584,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
584
584
  title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
585
585
  year = {2026},
586
586
  url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
587
- version = {0.3.0}
587
+ version = {0.3.2}
588
588
  }
589
589
  ```
590
590
 
@@ -533,7 +533,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for the full local testing guide and PR c
533
533
  title = {samplesheet-parser: Format-agnostic parser for Illumina SampleSheet.csv},
534
534
  year = {2026},
535
535
  url = {https://github.com/chaitanyakasaraneni/samplesheet-parser},
536
- version = {0.3.0}
536
+ version = {0.3.2}
537
537
  }
538
538
  ```
539
539
 
@@ -0,0 +1,130 @@
1
+ """
2
+ demo_merger.py — SampleSheetMerger usage examples.
3
+
4
+ Demonstrates three scenarios:
5
+
6
+ 1. Clean merge — two V1 sheets, no conflicts, output written.
7
+ 2. Mixed formats — V1 + V1 + V2, auto-converted to a target version.
8
+ 3. Conflict handling — index collision detected; abort vs. force write.
9
+
10
+ Run from the repo root::
11
+
12
+ python examples/demo_merger.py
13
+
14
+ Sample sheets used are in examples/samplesheets/.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from pathlib import Path
20
+
21
+ from samplesheet_parser import MergeResult, SampleSheetMerger
22
+ from samplesheet_parser.enums import SampleSheetVersion
23
+
24
+ SHEETS = Path(__file__).parent / "sample_sheets"
25
+
26
+
27
+ def print_result(result: MergeResult) -> None:
28
+ print(f" Summary : {result.summary()}")
29
+ print(f" Sample count : {result.sample_count}")
30
+ print(f" Has conflicts: {result.has_conflicts}")
31
+ print(f" Output path : {result.output_path}")
32
+ if result.warnings:
33
+ print(" Warnings:")
34
+ for w in result.warnings:
35
+ print(f" [{w.code}] {w.message}")
36
+ if result.conflicts:
37
+ print(" Conflicts:")
38
+ for c in result.conflicts:
39
+ print(f" [{c.code}] {c.message}")
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # Scenario 1: Clean merge — two V1 sheets → combined V2
44
+ # ---------------------------------------------------------------------------
45
+
46
+ print("=" * 60)
47
+ print("Scenario 1: Clean merge (V1 + V1 → V2)")
48
+ print("=" * 60)
49
+
50
+ merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
51
+ merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
52
+ merger.add(SHEETS / "ProjectBeta_SampleSheet.csv")
53
+
54
+ result = merger.merge(
55
+ SHEETS / "combined_clean.csv",
56
+ validate=True,
57
+ abort_on_conflicts=True,
58
+ )
59
+
60
+ print_result(result)
61
+ print()
62
+
63
+ # ---------------------------------------------------------------------------
64
+ # Scenario 2: Mixed formats — V1 + V1 + V2 → combined V2
65
+ # ProjectGamma supplies a V2 sheet; merger auto-converts all inputs to V2.
66
+ # Mixed-format inputs produce a warning but are not a conflict.
67
+ # ---------------------------------------------------------------------------
68
+
69
+ print("=" * 60)
70
+ print("Scenario 2: Mixed formats (V1 + V1 + V2 → V2)")
71
+ print("=" * 60)
72
+
73
+ merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
74
+ merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
75
+ merger.add(SHEETS / "ProjectBeta_SampleSheet.csv")
76
+ merger.add(SHEETS / "ProjectGamma_SampleSheet.csv")
77
+
78
+ result = merger.merge(
79
+ SHEETS / "combined_mixed_formats.csv",
80
+ validate=True,
81
+ abort_on_conflicts=True,
82
+ )
83
+
84
+ print_result(result)
85
+ print()
86
+
87
+ # ---------------------------------------------------------------------------
88
+ # Scenario 3a: Index collision — abort_on_conflicts=True (default)
89
+ # ProjectDelta reuses an index from ProjectAlpha on the same lane.
90
+ # The output file is NOT written; result.output_path is None.
91
+ # ---------------------------------------------------------------------------
92
+
93
+ print("=" * 60)
94
+ print("Scenario 3a: Index collision — abort (default)")
95
+ print("=" * 60)
96
+
97
+ merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
98
+ merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
99
+ merger.add(SHEETS / "ProjectDelta_SampleSheet_collision.csv")
100
+
101
+ result = merger.merge(
102
+ SHEETS / "combined_collision_aborted.csv",
103
+ validate=True,
104
+ abort_on_conflicts=True, # default — file not written on conflict
105
+ )
106
+
107
+ print_result(result)
108
+ print()
109
+
110
+ # ---------------------------------------------------------------------------
111
+ # Scenario 3b: Index collision — abort_on_conflicts=False (force write)
112
+ # Same collision as above, but the file is written anyway.
113
+ # Equivalent to passing --force on the CLI.
114
+ # ---------------------------------------------------------------------------
115
+
116
+ print("=" * 60)
117
+ print("Scenario 3b: Index collision — force write")
118
+ print("=" * 60)
119
+
120
+ merger = SampleSheetMerger(target_version=SampleSheetVersion.V2)
121
+ merger.add(SHEETS / "ProjectAlpha_SampleSheet.csv")
122
+ merger.add(SHEETS / "ProjectDelta_SampleSheet_collision.csv")
123
+
124
+ result = merger.merge(
125
+ SHEETS / "combined_collision_forced.csv",
126
+ validate=True,
127
+ abort_on_conflicts=False, # write despite conflicts
128
+ )
129
+
130
+ print_result(result)
@@ -0,0 +1,25 @@
1
+ [Header]
2
+ IEMFileVersion,5
3
+ Experiment Name,240115_A01234_0042_AHJLG7DRXX
4
+ Date,2024-01-15
5
+ Workflow,GenerateFASTQ
6
+ Application,FASTQ Only
7
+ Instrument Type,NovaSeq 6000
8
+ Assay,TruSeq DNA PCR-Free
9
+ Index Adapters,TruSeq DNA UD Indexes (96 Indexes)
10
+ Chemistry,Amplicon
11
+
12
+ [Reads]
13
+ 151
14
+ 151
15
+
16
+ [Settings]
17
+ ReverseComplement,0
18
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
19
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
20
+
21
+ [Data]
22
+ Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
23
+ 1,AlphaSample1,AlphaSample1,,A01,UDP0001,CAAGACAGAT,UDP0001,ACTATAGCCT,ProjectAlpha,WGS_batch1
24
+ 1,AlphaSample2,AlphaSample2,,B01,UDP0002,TGAACCTGAT,UDP0002,TGATACGTCC,ProjectAlpha,WGS_batch1
25
+ 1,AlphaSample3,AlphaSample3,,C01,UDP0003,GCACAACGTT,UDP0003,CATCTCACAG,ProjectAlpha,WGS_batch1
@@ -0,0 +1,25 @@
1
+ [Header]
2
+ IEMFileVersion,5
3
+ Experiment Name,240115_A01234_0042_AHJLG7DRXX
4
+ Date,2024-01-15
5
+ Workflow,GenerateFASTQ
6
+ Application,FASTQ Only
7
+ Instrument Type,NovaSeq 6000
8
+ Assay,TruSeq DNA PCR-Free
9
+ Index Adapters,TruSeq DNA UD Indexes (96 Indexes)
10
+ Chemistry,Amplicon
11
+
12
+ [Reads]
13
+ 151
14
+ 151
15
+
16
+ [Settings]
17
+ ReverseComplement,0
18
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
19
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
20
+
21
+ [Data]
22
+ Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
23
+ 1,BetaSample1,BetaSample1,,D01,UDP0004,ATCGCCTGTT,UDP0004,GACTAGCATG,ProjectBeta,WGS_batch2
24
+ 1,BetaSample2,BetaSample2,,E01,UDP0005,CTTGTAGCAA,UDP0005,TGCGTCAGCC,ProjectBeta,WGS_batch2
25
+ 1,BetaSample3,BetaSample3,,F01,UDP0006,GCGCGATGTT,UDP0006,CTATGCCGGT,ProjectBeta,WGS_batch2
@@ -0,0 +1,24 @@
1
+ [Header]
2
+ IEMFileVersion,5
3
+ Experiment Name,240115_A01234_0042_AHJLG7DRXX
4
+ Date,2024-01-15
5
+ Workflow,GenerateFASTQ
6
+ Application,FASTQ Only
7
+ Instrument Type,NovaSeq 6000
8
+ Assay,TruSeq DNA PCR-Free
9
+ Index Adapters,TruSeq DNA UD Indexes (96 Indexes)
10
+ Chemistry,Amplicon
11
+
12
+ [Reads]
13
+ 151
14
+ 151
15
+
16
+ [Settings]
17
+ ReverseComplement,0
18
+ Adapter,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
19
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
20
+
21
+ [Data]
22
+ Lane,Sample_ID,Sample_Name,Sample_Plate,Sample_Well,I7_Index_ID,index,I5_Index_ID,index2,Sample_Project,Description
23
+ 1,DeltaSample1,DeltaSample1,,G01,UDP0001,CAAGACAGAT,UDP0001,ACTATAGCCT,ProjectDelta,WGS_batch3
24
+ 1,DeltaSample2,DeltaSample2,,H01,UDP0009,AGTAAGCCGT,UDP0009,TTCCTGCAGT,ProjectDelta,WGS_batch3
@@ -0,0 +1,21 @@
1
+ [Header]
2
+ FileFormatVersion,2
3
+ RunName,240115_A01234_0042_AHJLG7DRXX
4
+ InstrumentPlatform,NovaSeqXSeries
5
+
6
+ [Reads]
7
+ Read1Cycles,151
8
+ Read2Cycles,151
9
+ Index1Cycles,10
10
+ Index2Cycles,10
11
+
12
+ [BCLConvert_Settings]
13
+ SoftwareVersion,3.9.3
14
+ AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
15
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
16
+
17
+ [BCLConvert_Data]
18
+ Lane,Sample_ID,Sample_Name,Index,Index2,Sample_Project
19
+ 1,GammaSample1,GammaSample1,TCCGCGAAGT,TCAGCCTTGT,ProjectGamma
20
+ 1,GammaSample2,GammaSample2,AATGTTGCGT,GCAGCCTATT,ProjectGamma
21
+ 1,GammaSample3,GammaSample3,GTTCCGTGAT,AGTCCTAGGT,ProjectGamma
@@ -0,0 +1,20 @@
1
+ [Header]
2
+ FileFormatVersion,2
3
+ RunName,240115_A01234_0042_AHJLG7DRXX
4
+
5
+ [Reads]
6
+ Read1Cycles,151
7
+ Read2Cycles,151
8
+
9
+ [BCLConvert_Settings]
10
+ AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
11
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
12
+
13
+ [BCLConvert_Data]
14
+ Lane,Sample_ID,Index,Index2,Sample_Project
15
+ 1,AlphaSample1,CAAGACAGAT,ACTATAGCCT,ProjectAlpha
16
+ 1,AlphaSample2,TGAACCTGAT,TGATACGTCC,ProjectAlpha
17
+ 1,AlphaSample3,GCACAACGTT,CATCTCACAG,ProjectAlpha
18
+ 1,BetaSample1,ATCGCCTGTT,GACTAGCATG,ProjectBeta
19
+ 1,BetaSample2,CTTGTAGCAA,TGCGTCAGCC,ProjectBeta
20
+ 1,BetaSample3,GCGCGATGTT,CTATGCCGGT,ProjectBeta
@@ -0,0 +1,19 @@
1
+ [Header]
2
+ FileFormatVersion,2
3
+ RunName,240115_A01234_0042_AHJLG7DRXX
4
+
5
+ [Reads]
6
+ Read1Cycles,151
7
+ Read2Cycles,151
8
+
9
+ [BCLConvert_Settings]
10
+ AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
11
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
12
+
13
+ [BCLConvert_Data]
14
+ Lane,Sample_ID,Index,Index2,Sample_Project
15
+ 1,AlphaSample1,CAAGACAGAT,ACTATAGCCT,ProjectAlpha
16
+ 1,AlphaSample2,TGAACCTGAT,TGATACGTCC,ProjectAlpha
17
+ 1,AlphaSample3,GCACAACGTT,CATCTCACAG,ProjectAlpha
18
+ 1,DeltaSample1,CAAGACAGAT,ACTATAGCCT,ProjectDelta
19
+ 1,DeltaSample2,AGTAAGCCGT,TTCCTGCAGT,ProjectDelta
@@ -0,0 +1,23 @@
1
+ [Header]
2
+ FileFormatVersion,2
3
+ RunName,240115_A01234_0042_AHJLG7DRXX
4
+
5
+ [Reads]
6
+ Read1Cycles,151
7
+ Read2Cycles,151
8
+
9
+ [BCLConvert_Settings]
10
+ AdapterRead1,AGATCGGAAGAGCACACGTCTGAACTCCAGTCA
11
+ AdapterRead2,AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT
12
+
13
+ [BCLConvert_Data]
14
+ Lane,Sample_ID,Index,Index2,Sample_Project
15
+ 1,AlphaSample1,CAAGACAGAT,ACTATAGCCT,ProjectAlpha
16
+ 1,AlphaSample2,TGAACCTGAT,TGATACGTCC,ProjectAlpha
17
+ 1,AlphaSample3,GCACAACGTT,CATCTCACAG,ProjectAlpha
18
+ 1,BetaSample1,ATCGCCTGTT,GACTAGCATG,ProjectBeta
19
+ 1,BetaSample2,CTTGTAGCAA,TGCGTCAGCC,ProjectBeta
20
+ 1,BetaSample3,GCGCGATGTT,CTATGCCGGT,ProjectBeta
21
+ 1,GammaSample1,TCCGCGAAGT,TCAGCCTTGT,ProjectGamma
22
+ 1,GammaSample2,AATGTTGCGT,GCAGCCTATT,ProjectGamma
23
+ 1,GammaSample3,GTTCCGTGAT,AGTCCTAGGT,ProjectGamma
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "samplesheet-parser"
7
- version = "0.3.0"
7
+ version = "0.3.2"
8
8
  description = "Format-agnostic parser for Illumina SampleSheet.csv files — supports IEM V1 and BCLConvert V2"
9
9
  readme = "README.md"
10
10
  license = { file = "LICENSE" }
@@ -122,6 +122,24 @@ class MergeResult:
122
122
  )
123
123
 
124
124
 
125
+ # ---------------------------------------------------------------------------
126
+ # Codes emitted by the cross-sheet pre-merge checks.
127
+ # These are deduplicated against post-merge validator output in
128
+ # _validate_merged() so the same issue is never reported twice.
129
+ # ---------------------------------------------------------------------------
130
+
131
+ # Validator codes that are fully covered by a pre-merge cross-sheet check
132
+ # and must be suppressed in _validate_merged() to avoid duplicate reporting.
133
+ _PRE_MERGE_CONFLICT_CODES: frozenset[str] = frozenset({
134
+ "INDEX_COLLISION", # covered by _check_index_collisions
135
+ "DUPLICATE_INDEX", # same root cause — validator sees result of collision
136
+ })
137
+
138
+ _PRE_MERGE_WARNING_CODES: frozenset[str] = frozenset({
139
+ "INDEX_DISTANCE_TOO_LOW", # covered by _check_index_distances
140
+ })
141
+
142
+
125
143
  # ---------------------------------------------------------------------------
126
144
  # Merger
127
145
  # ---------------------------------------------------------------------------
@@ -648,7 +666,15 @@ class SampleSheetMerger:
648
666
  writer: Any,
649
667
  result: MergeResult,
650
668
  ) -> None:
651
- """Run SampleSheetValidator on the merged writer content."""
669
+ """Run SampleSheetValidator on the merged writer content.
670
+
671
+ Codes already reported by the pre-merge cross-sheet checks
672
+ (``_PRE_MERGE_CONFLICT_CODES`` / ``_PRE_MERGE_WARNING_CODES``) are
673
+ suppressed here to avoid duplicate reporting in the final
674
+ ``MergeResult``. The pre-merge messages are more descriptive (they
675
+ include source file names and sample IDs) so they are always
676
+ preferred over the generic post-merge validator output.
677
+ """
652
678
  import tempfile
653
679
 
654
680
  content = writer.to_string()
@@ -677,7 +703,19 @@ class SampleSheetMerger:
677
703
  Path(tmp_path).unlink(missing_ok=True)
678
704
 
679
705
  for w in vresult.warnings:
706
+ if w.code in _PRE_MERGE_WARNING_CODES:
707
+ logger.debug(
708
+ f"Suppressing duplicate post-merge warning {w.code!r} "
709
+ "(already reported by pre-merge cross-sheet check)."
710
+ )
711
+ continue
680
712
  result.add_warning(w.code, w.message, **w.context)
681
713
 
682
714
  for e in vresult.errors:
715
+ if e.code in _PRE_MERGE_CONFLICT_CODES:
716
+ logger.debug(
717
+ f"Suppressing duplicate post-merge conflict {e.code!r} "
718
+ "(already reported by pre-merge cross-sheet check)."
719
+ )
720
+ continue
683
721
  result.add_conflict(e.code, e.message, **e.context)